Read in Data

Q1 = read_csv(here("/courses","EDS214", "ShaleCoRyJoe","data", "QuebradaCuenca1-Bisley.csv"), na = "-9999")

Q2 = read_csv(here("/courses","EDS214", "ShaleCoRyJoe","data", "QuebradaCuenca2-Bisley.csv"), na = "-9999")

Q3 = read_csv(here("/courses","EDS214", "ShaleCoRyJoe","data", "QuebradaCuenca3-Bisley.csv"), na = "-9999")

PRM = read_csv(here("/courses","EDS214", "ShaleCoRyJoe","data", "RioMameyesPuenteRoto.csv"), na = "-9999")

Find how many NA values are in temp across 4 datasets

PRM_temp <- PRM %>% 
  count(Temp == "NA")

Q1_temp <- Q1 %>% 
  count(Temp == "NA")

Q2_temp <- Q2 %>% 
  count(Temp == "NA")

Q3_temp <- Q3 %>% 
  count(Temp == "NA")


# It seems that 3/4 of the data values have a value for temperature, we consider that enough to continue exploration

Cleaning data and including temperature

Q1.2 = Q1 %>% 
  clean_names() %>% 
  select(sample_id, sample_date, temp) %>% 
  mutate(sample_date = mdy(sample_date)) %>% 
  group_by(sample_id, sample_date) %>% 
  summarize(temp_daily_mean = mean(temp))

Q2.2 = Q2 %>% 
  clean_names() %>% 
  select(sample_id, sample_date, temp) %>% 
  mutate(sample_date = mdy_hm(sample_date)) %>% 
  group_by(sample_id, sample_date) %>% 
  summarize(temp_daily_mean = mean(temp))

Q3.2 = Q3 %>% 
  clean_names() %>% 
  select(sample_id, sample_date, temp) %>% 
  mutate(sample_date = mdy(sample_date)) %>% 
  group_by(sample_id, sample_date) %>% 
  summarize(temp_daily_mean = mean(temp))

PRM.2 = PRM %>% 
  clean_names() %>% 
  select(sample_id, sample_date, temp) %>% 
  mutate(sample_date = mdy(sample_date)) %>% 
  mutate(sample_id = str_replace(string = sample_id, pattern = "MPR", replacement = "PRM")) %>%
  group_by(sample_id, sample_date) %>% 
  summarize(temp_daily_mean = mean(temp))

Combining Temperature Data from the 4 Datasets

combined_data_temp <- rbind(Q1.2, Q2.2, Q3.2, PRM.2)

Plot Combined Temperature Data

# eliminate 1 major outlier that was recorded incorrectly (>1000 degrees celcius)

combined_data_temp <- combined_data_temp %>% 
  filter(temp_daily_mean < 1000, temp_daily_mean > 10)
write_csv(combined_data_temp, "combined_data_temp.csv")

combined_data_temp_plot <- ggplot(data = combined_data_temp, aes(x = sample_date, y = temp_daily_mean, color = sample_id), na.rm = TRUE) +
  geom_line() + 
  facet_wrap(vars(sample_id))

combined_data_temp_plot

plotly_plot <- ggplotly(combined_data_temp_plot)

plotly_plot

Climate change temperature plotly 2000 - 2012

combined_data_temp_climate <- combined_data_temp %>% 
  filter(sample_date >= as.Date("2000-01-01"))

combined_data_temp_climate_plot <- ggplot(data = combined_data_temp_climate,
                                          aes(x = sample_date, 
                                              y = temp_daily_mean,
                                              color = sample_id)) +
  geom_line() +
  facet_wrap(vars(sample_id)) +
  labs(
    title = "Luquillo Watershed Temperature Data 2000 - 2012",
    x = "Temperature (c)", 
    y = "Date"
  ) +
   theme(plot.title = element_text(hjust = 0.5))

combined_data_temp_climate_plot

combined_data_temp_climate_plotly <- ggplotly(combined_data_temp_climate_plot)

combined_data_temp_climate_plotly